library(mosaic)
Loading required package: dplyr
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
Loading required package: lattice
Loading required package: ggformula
Loading required package: ggplot2
Loading required package: ggstance
Attaching package: ‘ggstance’
The following objects are masked from ‘package:ggplot2’:
geom_errorbarh, GeomErrorbarh
New to ggformula? Try the tutorials:
learnr::run_tutorial("introduction", package = "ggformula")
learnr::run_tutorial("refining", package = "ggformula")
Loading required package: mosaicData
Loading required package: Matrix
The 'mosaic' package masks several functions from core packages in order to add
additional features. The original behavior of these functions should not be affected by this.
Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
Attaching package: ‘mosaic’
The following object is masked from ‘package:Matrix’:
mean
The following object is masked from ‘package:ggplot2’:
stat
The following objects are masked from ‘package:dplyr’:
count, do, tally
The following objects are masked from ‘package:stats’:
binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test, quantile, sd, t.test, var
The following objects are masked from ‘package:base’:
max, mean, min, prod, range, sample, sum
library(tidyverse)
[30m── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──[39m
[30m[32m✓[30m [34mtibble [30m 2.1.3 [32m✓[30m [34mpurrr [30m 0.3.3
[32m✓[30m [34mtidyr [30m 1.0.2 [32m✓[30m [34mstringr[30m 1.4.0
[32m✓[30m [34mreadr [30m 1.3.1 [32m✓[30m [34mforcats[30m 0.5.0[39m
[30m── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31mx[30m [34mmosaic[30m::[32mcount()[30m masks [34mdplyr[30m::count()
[31mx[30m [34mpurrr[30m::[32mcross()[30m masks [34mmosaic[30m::cross()
[31mx[30m [34mmosaic[30m::[32mdo()[30m masks [34mdplyr[30m::do()
[31mx[30m [34mtidyr[30m::[32mexpand()[30m masks [34mMatrix[30m::expand()
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mggstance[30m::[32mgeom_errorbarh()[30m masks [34mggplot2[30m::geom_errorbarh()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()
[31mx[30m [34mtidyr[30m::[32mpack()[30m masks [34mMatrix[30m::pack()
[31mx[30m [34mmosaic[30m::[32mstat()[30m masks [34mggplot2[30m::stat()
[31mx[30m [34mmosaic[30m::[32mtally()[30m masks [34mdplyr[30m::tally()
[31mx[30m [34mtidyr[30m::[32munpack()[30m masks [34mMatrix[30m::unpack()[39m
library(lubridate)
Attaching package: ‘lubridate’
The following object is masked from ‘package:base’:
date
library(DataComputing)
library(rvest)
Loading required package: xml2
Attaching package: ‘rvest’
The following object is masked from ‘package:purrr’:
pluck
The following object is masked from ‘package:readr’:
guess_encoding
library(broom)
How do different aspects of determining how developed a country is (such as birth rate, population, death rate, etc.) manifest themselves in the spread of COVID-19. Essentially, how does COVID-19 show different levels of impact on different countries?
Reading in the Data:
Data Source 1: COVID
COVID <- read.csv(file = "total-covid-cases-deaths-per-million.csv")
COVID
COVID %>%
nrow()
[1] 9487
COVID %>%
names()
[1] "total.covid.cases.deaths.per.million" "X"
[3] "X.1" "X.2"
[5] "X.3" "X.4"
[7] "X.5" "X.6"
[9] "X.7" "X.8"
[11] "X.9" "X.10"
[13] "X.11" "X.12"
[15] "X.13" "X.14"
[17] "X.15" "X.16"
[19] "X.17" "X.18"
[21] "X.19" "X.20"
[23] "X.21" "X.22"
[25] "X.23" "X.24"
[27] "X.25" "X.26"
[29] "X.27" "X.28"
[31] "X.29" "X.30"
[33] "X.31" "X.32"
[35] "X.33" "X.34"
[37] "X.35" "X.36"
[39] "X.37" "X.38"
[41] "X.39" "X.40"
[43] "X.41" "X.42"
[45] "X.43" "X.44"
[47] "X.45" "X.46"
[49] "X.47" "X.48"
[51] "X.49" "X.50"
[53] "X.51" "X.52"
[55] "X.53" "X.54"
[57] "X.55" "X.56"
[59] "X.57" "X.58"
[61] "X.59" "X.60"
[63] "X.61" "X.62"
[65] "X.63" "X.64"
[67] "X.65" "X.66"
[69] "X.67" "X.68"
[71] "X.69" "X.70"
[73] "X.71" "X.72"
[75] "X.73" "X.74"
[77] "X.75" "X.76"
[79] "X.77" "X.78"
[81] "X.79" "X.80"
[83] "X.81" "X.82"
[85] "X.83" "X.84"
[87] "X.85" "X.86"
[89] "X.87" "X.88"
[91] "X.89" "X.90"
[93] "X.91" "X.92"
[95] "X.93" "X.94"
[97] "X.95" "X.96"
[99] "X.97" "X.98"
[101] "X.99" "X.100"
[103] "X.101" "X.102"
[105] "X.103" "X.104"
[107] "X.105" "X.106"
[109] "X.107" "X.108"
[111] "X.109" "X.110"
[113] "X.111" "X.112"
[115] "X.113" "X.114"
[117] "X.115" "X.116"
[119] "X.117" "X.118"
[121] "X.119" "X.120"
[123] "X.121" "X.122"
[125] "X.123" "X.124"
[127] "X.125" "X.126"
[129] "X.127" "X.128"
[131] "X.129" "X.130"
[133] "X.131" "X.132"
[135] "X.133" "X.134"
[137] "X.135" "X.136"
[139] "X.137" "X.138"
[141] "X.139" "X.140"
[143] "X.141" "X.142"
[145] "X.143" "X.144"
[147] "X.145" "X.146"
[149] "X.147" "X.148"
[151] "X.149" "X.150"
[153] "X.151" "X.152"
[155] "X.153" "X.154"
[157] "X.155" "X.156"
[159] "X.157" "X.158"
[161] "X.159" "X.160"
[163] "X.161" "X.162"
[165] "X.163" "X.164"
[167] "X.165" "X.166"
[169] "X.167" "X.168"
[171] "X.169" "X.170"
[173] "X.171" "X.172"
[175] "X.173" "X.174"
[177] "X.175" "X.176"
[179] "X.177" "X.178"
[181] "X.179" "X.180"
[183] "X.181" "X.182"
[185] "X.183" "X.184"
[187] "X.185" "X.186"
[189] "X.187" "X.188"
[191] "X.189" "X.190"
[193] "X.191" "X.192"
[195] "X.193" "X.194"
[197] "X.195" "X.196"
[199] "X.197" "X.198"
[201] "X.199" "X.200"
[203] "X.201" "X.202"
[205] "X.203" "X.204"
[207] "X.205" "X.206"
[209] "X.207" "X.208"
[211] "X.209" "X.210"
[213] "X.211" "X.212"
[215] "X.213" "X.214"
[217] "X.215" "X.216"
[219] "X.217" "X.218"
[221] "X.219" "X.220"
[223] "X.221" "X.222"
[225] "X.223" "X.224"
[227] "X.225" "X.226"
[229] "X.227" "X.228"
[231] "X.229" "X.230"
[233] "X.231" "X.232"
[235] "X.233" "X.234"
[237] "X.235" "X.236"
[239] "X.237" "X.238"
[241] "X.239" "X.240"
[243] "X.241" "X.242"
[245] "X.243" "X.244"
[247] "X.245" "X.246"
[249] "X.247" "X.248"
[251] "X.249" "X.250"
[253] "X.251" "X.252"
[255] "X.253" "X.254"
COVID %>%
head()
Data Source 2: CountryData
CountryData
CountryData %>%
nrow()
[1] 256
CountryData %>%
names()
[1] "country" "area" "pop" "growth" "birth"
[6] "death" "migr" "maternal" "infant" "life"
[11] "fert" "health" "HIVrate" "HIVpeople" "HIVdeath"
[16] "obesity" "underweight" "educ" "unemploymentYouth" "GDP"
[21] "GDPgrowth" "GDPcapita" "saving" "indProd" "labor"
[26] "unemployment" "family" "tax" "budget" "debt"
[31] "inflation" "discount" "lending" "narrow" "broad"
[36] "credit" "shares" "balance" "exports" "imports"
[41] "gold" "externalDebt" "homeStock" "abroadStock" "elecProd"
[46] "elecCons" "elecExp" "elecImp" "elecCap" "elecFossil"
[51] "elecNuc" "elecHydro" "elecRenew" "oilProd" "oilExp"
[56] "oilImp" "oilRes" "petroProd" "petroCons" "petroExp"
[61] "petroImp" "gasProd" "gasCons" "gasExp" "gasImp"
[66] "gasRes" "mainlines" "cell" "netHosts" "netUsers"
[71] "airports" "railways" "roadways" "waterways" "marine"
[76] "military"
CountryData %>%
head()
Data Source 3: Continents
Continents <- read.csv(file = "countries and continents.csv")
Continents
Continents %>%
nrow()
[1] 251
Continents %>%
names()
[1] "name" "official_name_en" "official_name_fr"
[4] "ISO3166.1.Alpha.2" "ISO3166.1.Alpha.3" "M49"
[7] "ITU" "MARC" "WMO"
[10] "DS" "Dial" "FIFA"
[13] "FIPS" "GAUL" "IOC"
[16] "ISO4217.currency_alphabetic_code" "ISO4217.currency_country_name" "ISO4217.currency_minor_unit"
[19] "ISO4217.currency_name" "ISO4217.currency_numeric_code" "is_independent"
[22] "Capital" "Continent" "TLD"
[25] "Languages" "Geoname.ID" "EDGAR"
Continents %>%
head()
COVID
TidyCOVID <- COVID %>%
rename(country = total.covid.cases.deaths.per.million ) %>%
rename( Code = X ) %>%
rename(Date = X.1 ) %>%
rename(DeathsPerMillion = X.2) %>%
rename(CasesPerMillion = X.3) %>%
filter(row_number() > 1) %>%
subset(select = c(1,2,3,4,5)) %>%
mutate( country = as.character(country) ) %>%
mutate( Code = as.character(Code) ) %>%
mutate(Date = mdy(Date)) %>%
mutate(DeathsPerMillion = as.integer(DeathsPerMillion) - 1) %>%
mutate(CasesPerMillion = as.integer(CasesPerMillion) - 1)
TidyCOVID
TidyCOVID
February1 <-
TidyCOVID %>%
filter(Date == "2020-02-01") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionFeb1 = DeathsPerMillion) %>%
rename(CasesPerMillionFeb1 = CasesPerMillion)
February29 <-
TidyCOVID %>%
filter(Date == "2020-02-29") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionFeb29 = DeathsPerMillion) %>%
rename(CasesPerMillionFeb29 = CasesPerMillion)
FebGrow <-
full_join(February1, February29, by = c("country")) %>%
mutate("CasesPerMillionFebGrowth" = CasesPerMillionFeb29 - CasesPerMillionFeb1) %>%
mutate("DeathsPerMillionFebGrowth" = DeathsPerMillionFeb29 - DeathsPerMillionFeb1)
March1 <-
TidyCOVID %>%
filter(Date == "2020-03-01") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionMarch1 = DeathsPerMillion) %>%
rename(CasesPerMillionMarch1 = CasesPerMillion)
March31 <-
TidyCOVID %>%
filter(Date == "2020-03-31") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionMarch31 = DeathsPerMillion) %>%
rename(CasesPerMillionMarch31 = CasesPerMillion)
MarchGrow <-
full_join(March1, March31, by = c("country")) %>%
mutate("CasesPerMillionMarchGrowth" = CasesPerMillionMarch31 - CasesPerMillionMarch1) %>%
mutate("DeathsPerMillionMarchGrowth" = DeathsPerMillionMarch31 - DeathsPerMillionMarch1)
December31 <-
TidyCOVID %>%
filter(Date == "2019-12-31") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionDecember31 = DeathsPerMillion) %>%
rename(CasesPerMillionDecember31 = CasesPerMillion)
April5 <-
TidyCOVID %>%
filter(Date == "2020-04-05") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionApril5 = DeathsPerMillion) %>%
rename(CasesPerMillionApril5 = CasesPerMillion)
TotalGrow <-
full_join(December31, April5, by = c("country")) %>%
mutate("CasesPerMillionTotalGrowth" = CasesPerMillionApril5 - CasesPerMillionDecember31) %>%
mutate("DeathsPerMillionTotalGrowth" = DeathsPerMillionApril5 - DeathsPerMillionDecember31)
FebGrow <-
FebGrow %>%
subset(select = c(1, 6, 7))
MarchGrow <-
MarchGrow %>%
subset(select = c(1, 6, 7))
TotalGrow <-
TotalGrow %>%
subset(select = c(1, 6, 7))
Growth <-
left_join(FebGrow,MarchGrow) %>%
left_join(TotalGrow)
Joining, by = "country"
Joining, by = "country"
Growth
Continents
TidyCont <-
Continents %>%
filter(row_number() > 2) %>%
subset(select = c(1,23)) %>%
rename(country = name ) %>%
mutate( country = as.character(country) ) %>%
mutate( Continent = as.character(Continent) )
TidyCont
COVIDwCont <-
left_join(TidyCOVID, TidyCont, by = "country")
DataMix <- left_join(COVIDwCont, CountryData, by = "country")
JOEY: I think this is a good one to start out with, but if there is some way that you could make this more clear (color code, etc.) that would be great. This is a good start to our data.
ggplot(data = DataMix,aes(x = Date, y = CasesPerMillion)) +
geom_point() +
facet_wrap(~Continent)
JOEY: I like this graph because it visualizes one of the factors that contributes to the development classification. Can we add in a color code or something that would represent one more factor, such as different countries or regions of the world. The more that we can do with this one, the better.
ggplot(data= DataMix, aes(x= Date, y = DeathsPerMillion)) +
geom_point() +
facet_wrap(~Continent)
GrowthMix <-left_join(Growth, CountryData, by = "country")
ggplot(data=GrowthMix, aes(x = airports,y = CasesPerMillionTotalGrowth)) +
geom_point() +
xlim(0,5000)
ggplot(data=GrowthMix, aes(x = health,y = CasesPerMillionTotalGrowth)) +
geom_point()
ggplot(data=GrowthMix, aes(x = health,y = DeathsPerMillionTotalGrowth)) +
geom_point()
GrowthMix %>%
arrange(desc(CasesPerMillionTotalGrowth))
ShowChange <-
GrowthMix %>%
select(c(1, 6, 7, 9)) %>%
filter(country != "Africa",
country != "Asia",
country != "Europe",
country != "North America",
country != "Oceania",
country != "South America",
country != "World"
) %>%
mutate("TotalGrowth" = (CasesPerMillionTotalGrowth * round(pop/1000000, digits = 0))) %>%
mutate("TotalDeath" = (DeathsPerMillionTotalGrowth * round(pop/1000000, digits = 0)))
ShowChange %>%
arrange(desc(TotalDeath))
WorldMap(ShowChange, key = country, fill = TotalGrowth)
Mapping API still under development and may change in future releases.